pacman::p_load(tmap, sf, tidyverse, spatstat, spNetwork, classInt, viridis,
maptools, raster, arrow)Take-home Assignment 01: Discovering geographical distribution of Grab hailing services in Singapore
Background
In this exploration, we investigate the geographical and spatio-temporal distribution of Grab hailing services in Singapore, leveraging the rich dataset provided by GRAB known as Grab Posisi. As a significant shared taxi operator in Southeast Asia, GRAB’s dataset offers a unique perspective on human mobility. This study focuses on applying Spatial Point Pattern Analyses (KDE/NKDE) to discern patterns within the dataset.
Install Packages & Importing Data
Install Necessary Packages
For this exercise, we will be using the following packages:
sf for importing, managing, and processing geospatial data through simple features.
tidyverse for comprehensive data science tasks, including importing, wrangling, and visualizing spatial data.
spatstat for point pattern analysis, offering a wide range of functions for exploring spatial patterns.
spNetwork for analyzing spatial networks and their properties.
classInt for determining class intervals for mapping and visualization purposes.
viridis for providing color maps suitable for creating visually appealing visualizations.
maptools for manipulating geographic data, offering a set of tools for various spatial operations.
raster for reading, writing, manipulating, and analyzing gridded spatial data in a raster format.
arrow for reading parquet files and to load the dataset
Importing Data
GrabPosisi Data (Pick Up Location)
#GrabPosisi Data
df <- arrow::read_parquet("/dljyuan/IS415-GAA/data/GrabPosisi/part-00000.parquet")
df2 <- arrow::read_parquet("/dljyuan/IS415-GAA/data/GrabPosisi/part-00001.parquet")
df3 <- arrow::read_parquet("/dljyuan/IS415-GAA/data/GrabPosisi/part-00002.parquet")
df4 <- arrow::read_parquet("/dljyuan/IS415-GAA/data/GrabPosisi/part-00003.parquet")
df5 <- arrow::read_parquet("/dljyuan/IS415-GAA/data/GrabPosisi/part-00004.parquet")
df6 <- arrow::read_parquet("/dljyuan/IS415-GAA/data/GrabPosisi/part-00005.parquet")
df7 <- arrow::read_parquet("/dljyuan/IS415-GAA/data/GrabPosisi/part-00006.parquet")
df8 <- arrow::read_parquet("/dljyuan/IS415-GAA/data/GrabPosisi/part-00007.parquet")
df9 <- arrow::read_parquet("/dljyuan/IS415-GAA/data/GrabPosisi/part-00008.parquet")
df10 <- arrow::read_parquet("/dljyuan/IS415-GAA/data/GrabPosisi/part-00009.parquet")Convert time variable to datetime format
df$pingtimestamp <- as_datetime(df$pingtimestamp)
df2$pingtimestamp <- as_datetime(df2$pingtimestamp)
df3$pingtimestamp <- as_datetime(df3$pingtimestamp)
df4$pingtimestamp <- as_datetime(df4$pingtimestamp)
df5$pingtimestamp <- as_datetime(df5$pingtimestamp)
df6$pingtimestamp <- as_datetime(df6$pingtimestamp)
df7$pingtimestamp <- as_datetime(df7$pingtimestamp)
df8$pingtimestamp <- as_datetime(df8$pingtimestamp)
df9$pingtimestamp <- as_datetime(df9$pingtimestamp)
df10$pingtimestamp <- as_datetime(df10$pingtimestamp)Extracting origin location (Pickup Points)
# Code repeated for all the other df
origin_df <- df %>%
group_by(trj_id) %>%
arrange(pingtimestamp) %>%
filter(row_number()==1) %>%
mutate(weekday = wday(pingtimestamp,
label=TRUE,
abbr=TRUE),
start_hr = factor(hour(pingtimestamp)),
day = factor(mday(pingtimestamp)))Combine all df together
combined_origin_df <- rbind(origin_df, origin_df2,origin_df3,origin_df4,origin_df5,origin_df6,origin_df7,origin_df8,origin_df9,origin_df10)For in class assignment, we are not going to combine the 10 different parquet files but only used one of them for entire analysis
Boundary Data (Coastal Outline)
mpsz_sf <- st_read(dsn = "../data/geospatial/",
layer="MPSZ-2019")Reading layer `MPSZ-2019' from data source `C:\dljyuan\IS415-GAA\data\geospatial' using driver `ESRI Shapefile'
Simple feature collection with 332 features and 6 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 103.6057 ymin: 1.158699 xmax: 104.0885 ymax: 1.470775
Geodetic CRS: WGS 84
sg_sf <- mpsz_sf %>% st_combine()
sg_sf <- st_make_valid(sg_sf)
length(which(st_is_valid(sg_sf) == FALSE))[1] 1
Road Layer Data (Singapore) The following include data from Singapore, Malaysia & Brunei
road <- st_read(dsn="../data/geospatial",
layer="gis_osm_roads_free_1")Convert CRS of Road Data to be the same as the Coastal Outline (SVY21)
road <- st_transform(road, crs = 3414)
sg_sf <- st_transform(sg_sf, crs = 3414)
st_geometry(sg_sf)
st_geometry(road)sg_sf <- st_transform(sg_sf, crs = 3414)
st_geometry(sg_sf)Geometry set for 1 feature
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 2667.537 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
Projected CRS: SVY21 / Singapore TM
MULTIPOLYGON (((18786.82 24302.7, 18786.04 2430...
mpsz_sf <- st_transform(mpsz_sf, crs = 3414)
st_geometry(mpsz_sf)Geometry set for 332 features
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
Projected CRS: SVY21 / Singapore TM
First 5 geometries:
MULTIPOLYGON (((33222.98 29588.13, 33222.51 295...
MULTIPOLYGON (((28481.45 30886.22, 28483.41 308...
MULTIPOLYGON (((28087.34 30541, 28087.54 30540....
MULTIPOLYGON (((14557.7 30447.21, 14562.89 3044...
MULTIPOLYGON (((29542.53 31041.2, 29553.72 3103...
Filter out unnecessary variables
#road <- road[, c("name", "geometry")]
origin_df <- origin_df[, c("trj_id", "rawlat", "rawlng", "accuracy", "weekday", "start_hr", "day")]Extracting road data from Singapore only
singapore_roads <- st_intersection(road, sg_sf)Import SG Road Layers
Data Wrangling
Convert Data CRS to 3414 SVY21
singapore_roads <- st_transform(singapore_roads, crs = 3414)tmap_mode('plot')
tm_shape(singapore_roads) +
tm_lines()
Convert Latitude & Longtitude to Cartisians
origin_df <- st_as_sf(origin_df,
coords = c("rawlng", "rawlat"),
crs=4326) %>%
st_transform(crs = 3414)tm_shape(origin_df) +
tm_dots() 
Convert to Spatial Class
origin <- as_Spatial(origin_df)
sg <- as_Spatial(sg_sf)
#singapore_roads <- as_Spatial(singapore_roads)Convert to Spatial Object
origin_sp <- as(origin, "SpatialPoints")
sg_sp <- as(sg, "SpatialPolygons")Convert to ppp Object
origin_ppp <- as(origin_sp, "ppp")Check for duplicate or overlap points
tmap_mode('plot')
tm_shape(sg_sf) +
tm_polygons() +
tm_shape(origin) +
tm_dots(alpha=0.4,
size=0.02)
any(duplicated(origin_ppp))[1] FALSE
sum(multiplicity(origin_ppp) > 1)[1] 0
Removing Duplicate Points
origin_ppp <- rjitter(origin_ppp,
retry=TRUE,
nsim=1,
drop=TRUE)
any(duplicated(origin_ppp))[1] FALSE
Convert to owin Object
sg_owin <- as(sg_sp, "owin")Combine Grabposisi Data with Coastal Outline
origin_ppp = origin_ppp[sg_owin]
plot(origin_ppp)
Exploratory Data Analysis
Kernel Density Estimation (KDE)
# Convert data to km as our unit of measurement
origin_ppp.km <- rescale(origin_ppp, 1000, "km")Selecting Kernel Method
Baddeley et. (2016) suggested the use of the bw.ppl() algorithm because in ther experience it tends to produce the more appropriate values when the pattern consists predominantly of tight clusters. But they also insist that if the purpose of once study is to detect a single tight cluster in the midst of random noise then the bw.diggle() method seems to work best.
par(mfrow=c(1,2))
plot(density(origin_ppp.km,
sigma=bw.diggle,
edge=TRUE,
kernel="gaussian"),
main = "bw.diggle")
plot(density(origin_ppp.km,
sigma=bw.ppl,
edge=TRUE,
kernel="gaussian"),
main = "bw.ppl")
The map bw.ppl might show sharper contrasts and potentially reveal more nuanced clusters or patterns in the point distribution compared to the bw.diggle map.
par(mfrow=c(2,2))
plot(density(origin_ppp.km,
sigma=bw.ppl,
edge=TRUE,
kernel="gaussian"),
main="Gaussian")
plot(density(origin_ppp.km,
sigma=bw.ppl,
edge=TRUE,
kernel="epanechnikov"),
main="Epanechnikov")
plot(density(origin_ppp.km,
sigma=bw.ppl,
edge=TRUE,
kernel="quartic"),
main="Quartic")
plot(density(origin_ppp.km,
sigma=bw.ppl,
edge=TRUE,
kernel="disc"),
main="Disc")
While the four KDE maps exhibit overall similarities, closer inspection reveals subtle differences in density patterns. For this analysis we will utilize the Gaussian kernel for this analysis.
kde_origin.ppl <- density(origin_ppp.km,
sigma=bw.ppl,
edge=TRUE,
kernel="gaussian")Comparing Fixed & Adaptive KDE
kde_origin_adaptive <- adaptive.density(origin_ppp.km, method="kernel")
par(mfrow=c(1,2))
plot(kde_origin.ppl, main = "Fixed bandwidth")
plot(kde_origin_adaptive, main = "Adaptive bandwidth")
Based on visual analysis of the KDE maps, fixed bandwidth will produce a more accurate and interpretable representation of the underlying data distribution as compared to adaptive bandwidth.
Converting KDE into grid object
gridded_kde_origin_ppl <- as.SpatialGridDataFrame.im(kde_origin.ppl)
kde_origin_ppl_raster <- raster(gridded_kde_origin_ppl)
projection(kde_origin_ppl_raster) <- CRS("+init=EPSG:3414")Visualising Output Map
tm_shape(kde_origin_ppl_raster) +
tm_raster("v", palette = "YlGnBu", title="") +
tm_layout(
legend.position = c("right", "bottom"),
main.title = "Pick Up Points Density",
frame = FALSE)
The map reveals several distinct clusters of high-density points, particularly concentrated in the southeastern and southern quadrants. These clusters could be further analyzed by dividing them into five distinct regions.
Network Kernel Density Estimation (NKDE)
The five high-density regions identified in the standard KDE analysis will be the focus of network kernel density estimation. This will leverage the network’s inherent connections and potentially uncover deeper insights and relationships within these areas.
Retrieving the five study areas Changi
ch <- mpsz_sf %>%
filter(PLN_AREA_N == "CHANGI")
ch <- ch%>%
st_union()
ch <- st_make_valid(ch)
length(which(st_is_valid(ch) == FALSE))[1] 0
Town
town <- mpsz_sf %>%
filter(PLN_AREA_N %in% c("OUTRAM","DOWNTOWN CORE","SINGAPORE RIVER","MUSEUM", "ROCHOR","RIVER VALLEY", "STRAITS VIEW", "MARINA SOUTH"))
town <- town%>%
st_union()
town <- st_make_valid(town)
length(which(st_is_valid(town) == FALSE))[1] 0
Woodlands
wood <- mpsz_sf %>%
filter(PLN_AREA_N == "WOODLANDS")
wood <- wood%>%
st_union()
wood <- st_make_valid(wood)
length(which(st_is_valid(wood) == FALSE))[1] 0
Choa Chu Kang
cck_bp <- mpsz_sf %>%
filter(PLN_AREA_N %in% c("CHOA CHU KANG","BUKIT PANJANG"))
cck_bp <- cck_bp%>%
st_union()
cck_bp <- st_make_valid(cck_bp)
length(which(st_is_valid(cck_bp) == FALSE))[1] 0
Jurong
jg <- mpsz_sf %>%
filter(PLN_AREA_N %in% c("JURONG WEST","JURONG EAST","CLEMENTI"))
jg <- jg%>%
st_union()
jg <- st_make_valid(jg)
length(which(st_is_valid(jg) == FALSE))[1] 0
Converting to SVY21 3414
ch <- st_transform(ch, crs = 3414)
town <- st_transform(town, crs = 3414)
wood <- st_transform(wood, crs = 3414)
cck_bp <- st_transform(cck_bp, crs = 3414)
jg <- st_transform(jg, crs = 3414)Extracting the road layers for each individual area
ch_roads <- st_intersection(singapore_roads, ch)
town_roads <- st_intersection(singapore_roads, town)
wood_roads <- st_intersection(singapore_roads, wood)
cck_bp_roads <- st_intersection(singapore_roads, cck_bp)
jg_roads <- st_intersection(singapore_roads, jg)Visualising the extract spatial data
par(mfrow=c(2,3))
plot(ch, main = "Changi Area")
plot(town, main = "Town Area")
plot(wood, main = "Woodlands Area")
plot(cck_bp, main = "Choa Chu Kang Area")
plot(jg, main = "Jurong Area")
plot(ch_roads[, c("osm_id", "name")])
plot(town_roads[, c("osm_id", "name")])
plot(wood_roads[, c("osm_id", "name")])
plot(cck_bp_roads[, c("osm_id", "name")])
plot(jg_roads[, c("osm_id", "name")])
tmap_arrange(tm_shape(ch_roads) +
tm_lines(col = "red") +
tm_layout(title = "Changi", title.size = 0.8),
tm_shape(town_roads) +
tm_lines(col = "blue") +
tm_layout(title = "Town", title.size = 0.8),
tm_shape(wood_roads) +
tm_lines(col = "green") +
tm_layout(title = "Woodlands", title.size = 0.8),
tm_shape(cck_bp_roads) +
tm_lines(col = "orange") +
tm_layout(title = "Choa Chu Kang", title.size = 0.8),
tm_shape(jg_roads) +
tm_lines(col = "purple") +
tm_layout(title = "Jurong", title.size = 0.8),
asp=2, ncol=3)
Converting the geometry format to linestring.
ch_roads <- st_cast(ch_roads, "LINESTRING")
town_roads <- st_cast(town_roads, "LINESTRING")
wood_roads <- st_cast(wood_roads, "LINESTRING")
cck_bp_roads <- st_cast(cck_bp_roads, "LINESTRING")
jg_roads <- st_cast(jg_roads, "LINESTRING")Preparing lixels object & generating line centre points
lixels_ch <- lixelize_lines(ch_roads,
700,
mindist = 350)
lixels_town <- lixelize_lines(town_roads,
700,
mindist = 350)
lixels_wood <- lixelize_lines(wood_roads,
700,
mindist = 350)
lixels_cck_bp <- lixelize_lines(cck_bp_roads,
700,
mindist = 350)
lixels_jg <- lixelize_lines(jg_roads,
700,
mindist = 350)Retrieving the pickup points for each study area
samples_ch <- lines_center(lixels_ch)
samples_town <- lines_center(lixels_town)
samples_wood <- lines_center(lixels_wood)
samples_cck_bp <- lines_center(lixels_cck_bp)
samples_jg <- lines_center(lixels_jg)origin_ch = st_intersection(origin_df, ch)
origin_town = st_intersection(origin_df, town)
origin_wood = st_intersection(origin_df, wood)
origin_cck_bp = st_intersection(origin_df, cck_bp)
origin_jg = st_intersection(origin_df, jg)Performing NetKDE
# This code is repeated for the remaining 5 area except the variables for [lines, events, w & samples]
densities <- nkde(ch_roads,
events = origin_ch,
w = rep(1,nrow(origin_ch)),
samples = samples_ch,
kernel_name = "quartic",
bw = 300,
div= "bw",
method = "simple",
digits = 1,
tol = 1,
grid_shape = c(1,1),
max_depth = 8,
agg = 5, #we aggregate events within a 5m radius (faster calculation)
sparse = TRUE,
verbose = FALSE)
samples_ch$density <- densities*1000
lixels_ch$density <- densities*1000tmap_mode('plot')
tm_shape(lixels_ch)+
tm_lines(col="density")+
tm_shape(origin_ch)+
tm_dots()
tmap_mode('plot')
tm_shape(lixels_town)+
tm_lines(col="density")+
tm_shape(origin_town)+
tm_dots()
tmap_mode('view')tmap mode set to interactive viewing
tm_shape(lixels_wood)+
tm_lines(col="density")+
tm_shape(origin_wood)+
tm_dots()tmap_mode('plot')
tm_shape(lixels_cck_bp)+
tm_lines(col="density")+
tm_shape(origin_cck_bp)+
tm_dots()
tmap_mode('plot')
tm_shape(lixels_jg)+
tm_lines(col="density")+
tm_shape(origin_jg)+
tm_dots()
Considering the large number/size of data points, Woodlands NetKDE visualization will be the sole area depicted on the OpenStreetMap of Singapore.